home *** CD-ROM | disk | FTP | other *** search
- {$C-}
- {$G512}
- {$P512}
-
- PROGRAM bigsort(Input, Output);
-
- {*************************************************************************}
- {* Copyright (c) Kim Kokkonen, TurboPower Software, 1985 *}
- {* Released to the public domain for personal, non-commercial use only *}
- {*************************************************************************}
-
- { sort as large a text file as fits in memory }
- { designed as a filter, requires Turbo Pascal 3.0 to compile }
- { written 7/85, phone 408-378-3672 }
- { see options in WRITEHELP, call BIGSORT with no arguments to list options}
- { sorts more than 3x faster than MSDOS SORT for large files }
- { includes a RANDOMIZE feature that aids in sorting presorted files }
- { compile with maximum heap size A000 }
-
- CONST
- maxfile = 15000; {max number of lines in input file.
- limited by 4*maxfile<65000}
- stackparas = 512; {minimum paragraphs to reserve for stack during read-in}
- ss = 9; {sort switchover from quick to bubble}
- toklen = 64; {max length of a command line token}
- maxtok = 4; {max number of tokens on command line}
- optiondelim = '-'; {char used to introduce command line options}
-
- TYPE
- linebuf = STRING[255];
- lineptr = ^byte;
- linearray = ARRAY[1..maxfile] OF lineptr;
-
- VAR
- lines : linearray; {pointers to each text line stored here}
- nlines : Integer; {number of lines}
- showstats, status, partial, upper, reverse : Boolean; {option flags}
- numtocopy, begincol, endcol : Integer; {option values}
- reg : RECORD {register variable}
- ax, bx, cx, dx, bp, si, di, ds, es, flags : Integer;
- END;
-
- PROCEDURE breakhalt;
- {-executed when break is detected}
- {exit with return code 1}
- BEGIN
- INLINE(
- $B8/$01/$4C/ {mov ax,4c01}
- $CD/$21 {int 21}
- );
- END; {breakhalt}
-
- PROCEDURE setbreak;
- {-set the ctrl-break address to a process exit handler}
- BEGIN
- reg.ax := $2523;
- reg.ds := CSeg;
- reg.dx := Ofs(breakhalt);
- MsDos(reg);
- END; {setbreak}
-
- PROCEDURE checkkeys;
- {-capture ^C, ^S, ^Q}
- {note that just calling keypressed should trigger int 23 on control-break}
- VAR
- c : Char;
- BEGIN
- WHILE KeyPressed DO BEGIN
- Read(Kbd, c);
- IF c = ^S THEN
- REPEAT
- Read(Kbd, c);
- IF c = ^C THEN breakhalt;
- UNTIL c = ^Q
- ELSE IF c = ^C THEN
- breakhalt;
- END;
- END; {checkkeys}
-
- FUNCTION iostat(bit : Integer) : Boolean;
- {-check status of the standard I/O}
- {bit=0 for input, 1 for output}
- {returns true if I or O is through console}
- BEGIN
- reg.ax := $4400;
- reg.bx := bit; {standard input or output}
- MsDos(reg);
- iostat := ((reg.dx AND 1) <> 0);
- END; {iostat}
-
- PROCEDURE putline(VAR l : linebuf; VAR lptr : lineptr);
- {-store a string in a contiguous array in the heap}
- {increment the position cursor nextpos}
- {halt if the string won't fit}
- VAR
- len : Byte ABSOLUTE l;
- tlen : Byte;
- space : Integer;
- BEGIN
- tlen := len+1; {length of string including length byte}
- space := MaxAvail;
- IF (space < 0) OR ((space-stackparas) > (1+(tlen SHR 4))) THEN BEGIN
- {enough space left to add string}
- GetMem(lptr, tlen);
- Move(l, lptr^, tlen);
- END ELSE BEGIN
- WriteLn(Con);
- WriteLn(Con, 'not enough memory left to store text....');
- Halt;
- END;
- END; {putline}
-
- FUNCTION getline(lptr : lineptr) : linebuf;
- {-get a string back from the contiguous heap array}
- VAR
- bytestomove : Byte;
- l : linebuf;
- BEGIN
- bytestomove := lptr^+1;
- Move(lptr^, l, bytestomove);
- getline := l;
- END; {getline}
-
- PROCEDURE readinfile(VAR nlines : Integer);
- {-read lines from standard input and put the text on the heap}
- VAR
- l : linebuf;
- BEGIN
- nlines := 0;
- IF status THEN BEGIN
- Write(Con, ^M); ClrEol;
- Write(Con, 'READING ');
- END;
- WHILE NOT EoF DO BEGIN
- {read line}
- ReadLn(l);
- IF nlines < maxfile THEN BEGIN
- nlines := nlines+1;
- IF status AND (nlines AND 31 = 0) THEN
- Write(Con, ^H^H^H^H^H, nlines:5);
- checkkeys;
- {store pointer into text heap}
- {store line on text heap}
- putline(l, lines[nlines]);
- END ELSE BEGIN
- WriteLn(Con);
- WriteLn(Con, 'Exceeded maximum number of lines....');
- Halt;
- END;
- END;
- END; {readinfile}
-
- PROCEDURE writeoutfile(nlines : Integer);
- {-write the sorted information out}
- VAR
- i : Integer;
- l : linebuf;
- BEGIN
- IF showstats THEN Write(Con, 'WRITING ');
- FOR i := 1 TO nlines DO BEGIN
- {for unknown reason, cannot put getline inside of writeln for DOS 2.1}
- l := getline(lines[i]);
- WriteLn(l);
- IF showstats AND (i MOD 32 = 0) THEN
- Write(Con, ^H^H^H^H^H, i:5);
- checkkeys;
- END;
- IF showstats THEN BEGIN
- Write(Con, ^M); ClrEol;
- END;
- END; {writeoutfile}
-
- PROCEDURE Swap(VAR x, y : lineptr);
- {-swap two array pointers}
- VAR
- temp : lineptr;
- BEGIN
- temp := x;
- x := y;
- y := temp;
- END; {swap}
-
- PROCEDURE mixlines(nlines : Integer);
- {-randomize record order to aid quicksort with semi-presorted lists}
- VAR
- i : Integer;
- BEGIN
- FOR i := 1 TO nlines DO
- Swap(lines[i], lines[1+Random(nlines)]);
- END; {mixlines}
-
- PROCEDURE stupcase(VAR l : linebuf);
- {-return uppercase of a string}
- VAR
- i : Byte;
- BEGIN
- FOR i := 1 TO Length(l) DO l[i] := UpCase(l[i]);
- END; {stupcase}
-
- FUNCTION lessthan(l1, l2 : linebuf) : Boolean;
- {-return true if l1<l2 under the option assumptions}
- BEGIN
- IF upper THEN BEGIN
- stupcase(l1);
- stupcase(l2);
- END;
- IF partial THEN BEGIN
- l1 := Copy(l1, begincol, numtocopy);
- l2 := Copy(l2, begincol, numtocopy);
- END;
- IF reverse THEN
- lessthan := (l1 > l2)
- ELSE
- lessthan := (l1 < l2)
- END; {lessthan}
-
- FUNCTION equal(l1, l2 : linebuf) : Boolean;
- {-return true if l1=l2 under the option assumptions}
- BEGIN
- IF upper THEN BEGIN
- stupcase(l1);
- stupcase(l2);
- END;
- IF partial THEN BEGIN
- l1 := Copy(l1, begincol, numtocopy);
- l2 := Copy(l2, begincol, numtocopy);
- END;
- equal := (l1 = l2);
- END; {equal}
-
- PROCEDURE bubblesort(k, l : Integer);
- {-simple n**2 sort good for short lists}
- VAR
- i, j : Integer;
- BEGIN
- FOR i := k TO (l-1) DO
- FOR j := l DOWNTO (i+1) DO
- IF lessthan(getline(lines[j]), getline(lines[j-1])) THEN
- Swap(lines[j], lines[j-1]);
- END; {bubblesort}
-
- PROCEDURE quicksort(i, j : Integer);
- {-fast sorting algorithm modified to be hybrid with bubblesort}
- VAR
- pivot : linebuf;
- k, pivotindex, ramleft : Integer;
- enoughram : Boolean;
-
- PROCEDURE writestatus(i, j : Integer);
- {-provide some reassurance that sort is proceeding}
- BEGIN
- Write(Con, ^H^H^H^H^H); ClrEol;
- {prints size of current partition being sorted}
- Write(Con, (j-i):5);
- END; {writestatus}
-
- FUNCTION findpivot(i, j : Integer) : Integer;
- {-part of quicksort}
- VAR
- firstkey, l : linebuf;
- k : Integer;
- BEGIN
- firstkey := getline(lines[i]);
- FOR k := (i+1) TO j DO BEGIN
- l := getline(lines[k]);
- IF lessthan(l, firstkey) THEN BEGIN
- findpivot := i;
- Exit;
- END ELSE IF NOT(equal(l, firstkey)) THEN BEGIN
- findpivot := k;
- Exit;
- END;
- END;
- findpivot := 0;
- END; {findpivot}
-
- FUNCTION partition(i, j : Integer; VAR pivot : linebuf) : Integer;
- {-part of quicksort}
- VAR
- l, r : Integer;
- BEGIN
- l := i;
- r := j;
- REPEAT
- Swap(lines[l], lines[r]);
- WHILE lessthan(getline(lines[l]), pivot) DO l := l+1;
- WHILE NOT(lessthan(getline(lines[r]), pivot)) DO r := r-1;
- UNTIL l > r;
- partition := l;
- END; {partition}
-
- BEGIN {quicksort}
- checkkeys; {check for a break}
- IF status THEN writestatus(i, j);
- pivotindex := findpivot(i, j);
- IF pivotindex <> 0 THEN BEGIN
- pivot := getline(lines[pivotindex]);
- k := partition(i, j, pivot);
- ramleft := MemAvail;
- enoughram := (ramleft < 0) OR (ramleft > 32);
- IF ((k-1-i) > ss) AND enoughram THEN
- quicksort(i, k-1)
- ELSE
- bubblesort(i, k-1);
- IF ((j-k) > ss) AND enoughram THEN
- quicksort(k, j)
- ELSE
- bubblesort(k, j);
- END;
- END; {quicksort}
-
- PROCEDURE writehelp;
- {-display a help screen}
- BEGIN
- WriteLn(Con);
- WriteLn(Con, 'Usage: BIGSORT [Options] <InputPathname [>OutputPathName]');
- LowVideo;
- WriteLn(Con);
- WriteLn(Con, 'Sort limited in size only by available RAM.');
- WriteLn(Con, '384K free RAM will sort a 256K file of 7000 lines.');
- WriteLn(Con, 'Each text line limited to 255 characters and must be terminated by a <CR><LF>.');
- WriteLn(Con, 'Maximum of 15000 text lines.');
- WriteLn(Con, 'Input text is automatically randomized to avoid presorting problems.');
- WriteLn(Con);
- NormVideo;
- WriteLn(Con, 'Options:');
- LowVideo;
- WriteLn(Con, ' -I Ignore case while sorting');
- WriteLn(Con, ' -R sort in Reverse order');
- WriteLn(Con, ' -Bn Begin sort key with column n of each line (default 1)');
- WriteLn(Con, ' -En End sort key with column n of each line (default end of line)');
- WriteLn(Con, ' -Q Quiet mode. No status during sort');
- NormVideo;
- END; {writehelp}
-
- PROCEDURE setoptions;
- {-analyze input line}
- VAR
- i, code : Integer;
- num : STRING[6];
- arg : STRING[64];
- BEGIN
- {set defaults}
- upper := False; reverse := False; status := True;
- begincol := 1; endcol := 255; partial := False;
-
- WriteLn(Con);
-
- {scan through argument list}
- i := 1;
- WHILE i <= ParamCount DO BEGIN
- arg := ParamStr(i);
- IF (arg[1] = optiondelim) AND (Length(arg) > 1) THEN BEGIN
- CASE UpCase(arg[2]) OF
- 'I' : upper := True;
- 'R' : reverse := True;
- 'Q' : status := False;
- 'B' : BEGIN
- num := Copy(arg, 3, 6);
- Val(num, begincol, code);
- IF code <> 0 THEN BEGIN
- WriteLn(Con, 'Illegal Begin column specified: ', arg);
- writehelp;
- Halt;
- END;
- IF (begincol <= 0) OR (begincol > 255) THEN BEGIN
- WriteLn(Con, 'Illegal Begin column specified: ', arg);
- WriteLn(Con, ' column must be in the range of 1..255');
- writehelp;
- Halt;
- END;
- IF begincol > 1 THEN partial := True;
- END;
- 'E' : BEGIN
- num := Copy(arg, 3, 6);
- Val(num, endcol, code);
- IF code <> 0 THEN BEGIN
- WriteLn(Con, 'Illegal End column specified: ', arg);
- writehelp;
- Halt;
- END;
- IF (endcol <= 0) OR (endcol > 255) THEN BEGIN
- WriteLn(Con, 'Illegal End column specified: ', arg);
- WriteLn(Con, ' column must be in the range of 1..255');
- writehelp;
- Halt;
- END;
- IF endcol < 255 THEN partial := True;
- END;
- ELSE
- WriteLn(Con, 'Unrecognized command line option: ', arg);
- writehelp;
- Halt;
- END;
- END ELSE BEGIN
- WriteLn(Con, 'Unrecognized command line option: ', arg);
- writehelp;
- Halt;
- END;
- i := i+1;
- END;
- numtocopy := endcol-begincol+1;
- showstats := status AND NOT(iostat(1));
- END; {setoptions}
-
- FUNCTION ramavail : Real;
- {-return the number of bytes of RAM left for heap and stack}
- VAR
- t : Real;
- BEGIN
- t := MaxAvail;
- IF t < 0 THEN t := 65536.0+t;
- ramavail := 16.0*t;
- END; {ramavail}
-
- BEGIN {main}
-
- IF iostat(0) THEN BEGIN
- WriteLn(Con);
- WriteLn(Con, 'input must be redirected from a file....');
- writehelp;
- Halt;
- END;
-
- {analyze command line options}
- setoptions;
-
- {make sure we can break out}
- setbreak;
-
- IF status THEN
- WriteLn(Con, 'Total RAM for heap and stack: ', ramavail:6:0);
-
- {read in the input file}
- readinfile(nlines);
-
- IF status THEN BEGIN
- Write(Con, ^M); ClrEol;
- WriteLn(Con, 'RAM left for recursion stack: ', ramavail:6:0);
- WriteLn(Con, 'Total lines: ', nlines);
- END;
-
- {randomize records}
- IF status THEN Write(Con, 'RANDOMIZING');
- mixlines(nlines);
-
- {sort}
- IF status THEN BEGIN
- Write(Con, ^M); ClrEol;
- Write(Con, 'SORTING ');
- END;
- IF nlines > ss THEN quicksort(1, nlines) ELSE bubblesort(1, nlines);
- IF status THEN BEGIN
- Write(Con, ^M); ClrEol;
- END;
-
- {write out the results}
- writeoutfile(nlines);
-
- END.
-